#!/usr/bin/env python 
from collections import defaultdict
from  sys  import argv,stdin
from pathlib import Path

d1=defaultdict(list)
d2=defaultdict(list)
workdir=Path(argv[2]).absolute().parent

prefix=Path(argv[2]).absolute().name.split('.')[0]

f1=open(f"{workdir}/{prefix}_neibiao.xls",'w')

f1.write('S1_id\tS1_number\n')

for line in stdin:
    if not line.strip():continue
    if line.startswith('@'):continue
    line=line.strip().split('\t')
    aln_regex=line[5]
    aln_id=line[2]
    aln_q=line[0]
    if aln_regex.find('M')==2 and len(aln_regex)==3:
        if aln_id.find('GCA')>-1:
            d1[aln_id].append(aln_q)
        else:
            d2[aln_id].append(aln_q)

all_len_DNA=0
all_len_CF=0
for i,j in d1.items():
    num=len(j)
    all_len_DNA+=int(num)
    f1.write(f'{i}\t{num}\n')

for i,j in d2.items():
    num=len(j)
    all_len_CF+=int(num)
    f1.write(f'{i}\t{num}\n')

f1.write(f'all_DNA\t{all_len_DNA}\n')
f1.write(f'all_CF\t{all_len_CF}\n')
f1.close()
